Observed data
Observed log-chlorophyll at representative station for the St. Lucie Estuary
library(tidyverse)
library(lubridate)
library(mgcv)
library(plotly)
library(WRTDStidal)
library(gridExtra)
source('R/funcs.R')
# flow data, left moving window average of 30 days
data(sl_fldat)
fl_dat <- sl_fldat %>%
rename(date = Date) %>%
mutate(
qsm = stats::filter(q, rep(1, 30)/30, sides = 1, method = 'convolution')
)
# format the data to model
data(sl_dat)
sl_mod <- sl_dat %>%
rename(date = Date) %>%
mutate(
doy = yday(date),
dec_time = decimal_date(date),
yr = year(date),
yr = factor(yr),
mo = month(date, label = T)
) %>%
left_join(fl_dat, by = 'date') %>%
mutate(
flo = log(qsm),
chl = log(1 + chl)
) %>%
select(-q, -qsm, -sal)
# plot, all
p <- ggplot(sl_mod, aes(x = date, y = chl)) +
geom_line() +
theme_bw()
ggplotly(p)
# boxplot, by years
p <- ggplot(sl_mod, aes(x = yr, y = chl)) +
geom_boxplot() +
theme_bw()
ggplotly(p)
# boxplot, by month
p <- ggplot(sl_mod, aes(x = mo, y = chl)) +
geom_boxplot() +
theme_bw()
ggplotly(p)
GAMs with annual, seasonal trends
Some simple GAMs to explore annual, seasonal trends.
# smooths to evaluate
smths <- c(
"s(dec_time, bs = 'tp')",
"s(doy, bs = 'cc')",
"te(dec_time, doy, bs = c('tp', 'cc'))"
)
# get all combinations of smoothers to model, one to many
frms <- list()
for(i in seq_along(smths)){
frm <- combn(smths, i) %>%
apply(2, function(x){
paste(x, collapse = ' + ') %>%
paste('chl ~ ', .) %>%
formula
})
frms <- c(frms, frm)
}
# create models from smooth formula combinations
mods <- map(frms, function(frm){
gam(frm,
knots = list(doy = c(1, 366)),
data = sl_mod,
na.action = na.exclude
)
})
names(mods) <- paste0('mod', seq_along(mods))
Summary stats of annual, seasonal models:
# smoother stats of GAMs
map(mods, ~ summary(.x)$s.table %>% data.frame %>% rownames_to_column('smoother')) %>%
enframe %>%
unnest %>%
kable(digits = 2)
| mod1 |
s(dec_time) |
1.49 |
1.83 |
8.05 |
0.00 |
| mod2 |
s(doy) |
4.21 |
8.00 |
6.52 |
0.00 |
| mod3 |
te(dec_time,doy) |
8.43 |
11.10 |
5.17 |
0.00 |
| mod4 |
s(dec_time) |
1.88 |
2.35 |
6.03 |
0.00 |
| mod4 |
s(doy) |
4.22 |
8.00 |
6.63 |
0.00 |
| mod5 |
s(dec_time) |
1.00 |
1.00 |
5.66 |
0.02 |
| mod5 |
te(dec_time,doy) |
9.56 |
15.00 |
3.33 |
0.00 |
| mod6 |
s(doy) |
4.21 |
8.00 |
5.84 |
0.00 |
| mod6 |
te(dec_time,doy) |
2.74 |
3.81 |
4.00 |
0.00 |
| mod7 |
s(dec_time) |
1.00 |
1.00 |
10.64 |
0.00 |
| mod7 |
s(doy) |
4.16 |
8.00 |
5.16 |
0.00 |
| mod7 |
te(dec_time,doy) |
2.85 |
15.00 |
0.28 |
0.16 |
# summary stats of GAMs
map(mods, ~ data.frame(
AIC = AIC(.x),
R2 = summary(.x)$r.sq)) %>%
enframe %>%
unnest %>%
kable(digits = 2)
| mod1 |
654.57 |
0.04 |
| mod2 |
621.13 |
0.13 |
| mod3 |
619.47 |
0.15 |
| mod4 |
609.37 |
0.16 |
| mod5 |
619.10 |
0.15 |
| mod6 |
609.19 |
0.17 |
| mod7 |
608.68 |
0.17 |
# prediction data
pred_dat <- crossing(
yr = seq(floor(min(sl_mod$dec_time)), ceiling(max(sl_mod$dec_time))),
doy = seq(1, 365, by = 5)
) %>%
unite('date', yr, doy, sep = '-', remove = F) %>%
mutate(
date = as.Date(date, format = '%Y-%j'),
dec_time = decimal_date(date),
mo = month(date, label = TRUE),
yr = year(date)
) %>%
left_join(., fl_dat[, c('date', 'qsm')]) %>%
mutate(flo = log(qsm)) %>%
select(-qsm)
# predictions
sl_res <- map(mods, function(x){
pred_dat %>%
mutate(
pred = predict(x, newdata = pred_dat)
)
}) %>%
enframe('mods') %>%
unnest
# plot
p <- ggplot(sl_res, aes(x = date)) +
geom_point(data = sl_mod, aes(y = chl), size = 0.5) +
geom_line(aes(y = pred, colour = mods)) +
theme_bw() +
theme(
legend.position = 'top',
legend.title = element_blank()
)
ggplotly(p)
# plot
p <- ggplot(sl_res, aes(x = doy, group = factor(yr), colour = yr)) +
geom_line(aes(y = pred)) +
theme_bw() +
theme(
legend.position = 'top',
legend.title = element_blank()
) +
facet_wrap(~ mods, ncol = 2)
ggplotly(p)
GAMs with annual, seasonal, and flow trends
Adding flow data to the model:
# smooths to evaluate
smths <- c(
"s(dec_time, bs = 'tp')",
"s(doy, bs = 'cc')",
"s(flo, bs = 'tp')",
"te(flo, doy, bs = c('tp', 'cc'))",
"te(flo, dec_time, bs = c('tp', 'tp'))",
"te(dec_time, doy, bs = c('tp', 'cc'))",
"te(dec_time, doy, flo, bs = c('tp', 'cc', 'tp'))"
)
# get all combinations of smoothers to model, one to many
frms <- list()
for(i in seq_along(smths)){
frm <- combn(smths, i) %>%
apply(2, function(x){
paste(x, collapse = ' + ') %>%
paste('chl ~ ', .) %>%
formula
})
frms <- c(frms, frm)
}
# create models from smooth formula combinations
mods2 <- map(frms, function(frm){
gam(frm,
knots = list(doy = c(1, 366)),
data = sl_mod,
na.action = na.exclude
)
})
names(mods2) <- paste0('mod', seq_along(mods2))
save(mods2, file = 'data/sl_mods2.RData', compress = 'xz')
Summary stats of best year/season model, year/season/flow model:
data(sl_mods2)
# best model with only season, year
best1 <- map(mods, ~ summary(.x)$r.sq) %>%
unlist %>%
which.max %>%
mods[[.]]
# best model with season, year, flow
best2 <- map(mods2, ~ summary(.x)$r.sq) %>%
unlist %>%
which.max %>%
mods2[[.]]
best <- list(best1 = best1, best2 = best2)
# smoother stats of GAMs
map(best, ~ summary(.x)$s.table %>% data.frame %>% rownames_to_column('smoother')) %>%
enframe %>%
unnest %>%
kable(digits = 2)
| best1 |
s(dec_time) |
1.00 |
1.00 |
10.64 |
0.00 |
| best1 |
s(doy) |
4.16 |
8.00 |
5.16 |
0.00 |
| best1 |
te(dec_time,doy) |
2.85 |
15.00 |
0.28 |
0.16 |
| best2 |
s(dec_time) |
1.00 |
1.00 |
1.93 |
0.17 |
| best2 |
s(doy) |
2.94 |
8.00 |
0.71 |
0.05 |
| best2 |
s(flo) |
6.78 |
7.86 |
2.96 |
0.00 |
| best2 |
te(flo,doy) |
2.96 |
12.00 |
0.58 |
0.01 |
| best2 |
te(flo,dec_time) |
0.21 |
15.00 |
0.02 |
0.09 |
| best2 |
te(dec_time,doy) |
0.00 |
15.00 |
0.00 |
0.81 |
| best2 |
te(dec_time,doy,flo) |
19.86 |
48.00 |
1.01 |
0.00 |
# summary stats of GAMs
map(best, ~ data.frame(
AIC = AIC(.x),
R2 = summary(.x)$r.sq)) %>%
enframe %>%
unnest %>%
kable(digits = 2)
| best1 |
608.68 |
0.17 |
| best2 |
556.83 |
0.34 |
# predictions
sl_res2 <- map(best, function(x){
pred_dat %>%
mutate(
pred = predict(x, newdata = pred_dat)
)
}) %>%
enframe('mods') %>%
unnest
# plot
p <- ggplot(sl_res2, aes(x = date)) +
geom_point(data = sl_mod, aes(y = chl), size = 0.5) +
geom_line(aes(y = pred, colour = mods)) +
theme_bw() +
theme(
legend.position = 'top',
legend.title = element_blank()
)
ggplotly(p)
ptheme <- theme(
axis.title.x = element_blank(),
axis.title.y = element_blank()
)
cols <- 'Spectral'
pb1 <- dynagam(best1, pred_dat, ncol = 1, col_vec = cols) +
ptheme +
theme(legend.position = 'none') +
ggtitle('Best 1')
pb2 <- dynagam(best2, pred_dat, ncol = 1, col_vec = cols) +
ptheme +
ggtitle('Best2')
pleg <- g_legend(pb2)
pb2 <- pb2 +
theme(legend.position = 'none')
grid.arrange(
pleg,
arrangeGrob(pb1, pb2, ncol = 2, bottom = 'lnQ', left = 'lnchl'),
ncol = 1,
heights = c(0.1, 1)
)

Adding nitrogen and turbidity covariates
# formula for best annual, seasonal, flow model
strt <- best2$formula %>%
as.character
smths <- c(
"s(nh, bs = 'tp')",
"s(tss, bs = 'tp')",
"te(nh, tss, bs = c('tp', 'tp'))"
)
# get all combinations of smoothers to model, one to many
frmstab <- list()
frms <- list()
for(i in seq_along(smths)){
# for the summary table
frmtab <- combn(smths, i) %>%
apply(2, function(x){
paste(x, collapse = ' + ')
})
# for the model
frm <- sapply(frmtab, function(x){
paste('chl ~', strt[3], '+', x) %>%
formula
})
frmstab <- c(frmstab, frmtab)
frms <- c(frms, frm)
}
# create models from smooth formula combinations
mods3 <- map(frms, function(frm){
gam(frm,
knots = list(doy = c(1, 366)),
data = sl_mod,
na.action = na.exclude
)
})
names(mods3) <- paste0('mod', seq_along(mods3))
Summary of all nutrient, turbidity models
# smoother stats of GAMs
map(mods3, ~ summary(.x)$s.table %>% data.frame %>% rownames_to_column('smoother')) %>%
enframe %>%
unnest %>%
kable(digits = 2)
| mod1 |
s(dec_time) |
7.15 |
8.14 |
2.28 |
0.02 |
| mod1 |
s(doy) |
4.43 |
8.00 |
4.71 |
0.00 |
| mod1 |
s(flo) |
3.55 |
4.35 |
2.76 |
0.03 |
| mod1 |
te(flo,doy) |
0.80 |
15.00 |
0.07 |
0.08 |
| mod1 |
te(flo,dec_time) |
2.07 |
16.00 |
0.21 |
0.05 |
| mod1 |
te(dec_time,doy) |
0.00 |
15.00 |
0.00 |
0.66 |
| mod1 |
te(dec_time,doy,flo) |
7.50 |
48.00 |
0.30 |
0.00 |
| mod1 |
s(nh) |
3.11 |
3.79 |
8.77 |
0.00 |
| mod2 |
s(dec_time) |
7.36 |
8.30 |
1.77 |
0.08 |
| mod2 |
s(doy) |
3.73 |
8.00 |
2.14 |
0.00 |
| mod2 |
s(flo) |
6.46 |
7.63 |
1.31 |
0.19 |
| mod2 |
te(flo,doy) |
0.20 |
15.00 |
0.01 |
0.18 |
| mod2 |
te(flo,dec_time) |
3.54 |
16.00 |
0.67 |
0.00 |
| mod2 |
te(dec_time,doy) |
0.00 |
15.00 |
0.00 |
0.62 |
| mod2 |
te(dec_time,doy,flo) |
9.43 |
48.00 |
0.38 |
0.00 |
| mod2 |
s(tss) |
4.01 |
4.92 |
3.55 |
0.00 |
| mod3 |
s(dec_time) |
8.04 |
8.73 |
2.74 |
0.01 |
| mod3 |
s(doy) |
4.41 |
8.00 |
5.71 |
0.00 |
| mod3 |
s(flo) |
1.00 |
1.00 |
5.88 |
0.02 |
| mod3 |
te(flo,doy) |
0.72 |
15.00 |
0.10 |
0.01 |
| mod3 |
te(flo,dec_time) |
3.87 |
16.00 |
1.28 |
0.00 |
| mod3 |
te(dec_time,doy) |
0.00 |
15.00 |
0.00 |
0.59 |
| mod3 |
te(dec_time,doy,flo) |
4.43 |
48.00 |
0.15 |
0.02 |
| mod3 |
te(nh,tss) |
10.99 |
12.32 |
5.54 |
0.00 |
| mod4 |
s(dec_time) |
7.87 |
8.63 |
2.28 |
0.02 |
| mod4 |
s(doy) |
4.33 |
8.00 |
5.69 |
0.00 |
| mod4 |
s(flo) |
4.66 |
5.69 |
2.82 |
0.01 |
| mod4 |
te(flo,doy) |
0.03 |
15.00 |
0.00 |
0.10 |
| mod4 |
te(flo,dec_time) |
2.70 |
16.00 |
0.36 |
0.01 |
| mod4 |
te(dec_time,doy) |
0.00 |
15.00 |
0.00 |
0.62 |
| mod4 |
te(dec_time,doy,flo) |
6.09 |
48.00 |
0.23 |
0.01 |
| mod4 |
s(nh) |
3.36 |
4.07 |
9.03 |
0.00 |
| mod4 |
s(tss) |
4.52 |
5.49 |
3.83 |
0.00 |
| mod5 |
s(dec_time) |
8.05 |
8.73 |
2.78 |
0.00 |
| mod5 |
s(doy) |
4.40 |
8.00 |
5.36 |
0.00 |
| mod5 |
s(flo) |
1.00 |
1.00 |
5.66 |
0.02 |
| mod5 |
te(flo,doy) |
0.98 |
15.00 |
0.12 |
0.02 |
| mod5 |
te(flo,dec_time) |
3.94 |
16.00 |
1.21 |
0.00 |
| mod5 |
te(dec_time,doy) |
0.00 |
15.00 |
0.00 |
0.60 |
| mod5 |
te(dec_time,doy,flo) |
4.37 |
48.00 |
0.15 |
0.02 |
| mod5 |
s(nh) |
3.62 |
4.51 |
5.90 |
0.00 |
| mod5 |
te(nh,tss) |
9.82 |
20.00 |
1.52 |
0.00 |
| mod6 |
s(dec_time) |
7.86 |
8.63 |
2.48 |
0.01 |
| mod6 |
s(doy) |
4.24 |
8.00 |
5.00 |
0.00 |
| mod6 |
s(flo) |
3.31 |
4.07 |
3.56 |
0.01 |
| mod6 |
te(flo,doy) |
0.23 |
15.00 |
0.03 |
0.07 |
| mod6 |
te(flo,dec_time) |
2.50 |
16.00 |
0.33 |
0.01 |
| mod6 |
te(dec_time,doy) |
0.00 |
15.00 |
0.00 |
0.62 |
| mod6 |
te(dec_time,doy,flo) |
6.31 |
48.00 |
0.24 |
0.01 |
| mod6 |
s(tss) |
4.60 |
5.58 |
3.60 |
0.00 |
| mod6 |
te(nh,tss) |
2.25 |
14.00 |
2.78 |
0.00 |
| mod7 |
s(dec_time) |
7.94 |
8.67 |
2.52 |
0.01 |
| mod7 |
s(doy) |
4.34 |
8.00 |
4.73 |
0.00 |
| mod7 |
s(flo) |
3.13 |
3.84 |
3.49 |
0.01 |
| mod7 |
te(flo,doy) |
0.36 |
15.00 |
0.05 |
0.08 |
| mod7 |
te(flo,dec_time) |
2.54 |
16.00 |
0.34 |
0.01 |
| mod7 |
te(dec_time,doy) |
0.00 |
15.00 |
0.00 |
0.64 |
| mod7 |
te(dec_time,doy,flo) |
5.96 |
48.00 |
0.23 |
0.01 |
| mod7 |
s(nh) |
1.00 |
1.00 |
0.01 |
0.93 |
| mod7 |
s(tss) |
4.42 |
5.37 |
3.78 |
0.00 |
| mod7 |
te(nh,tss) |
2.17 |
15.00 |
1.72 |
0.00 |
# summary stats of GAMs
map(mods3, ~ data.frame(
AIC = AIC(.x),
R2 = summary(.x)$r.sq)) %>%
enframe %>%
unnest %>%
mutate(smth_added = frmstab) %>%
select(name, smth_added, everything()) %>%
kable(digits = 2)
| mod1 |
s(nh, bs = ‘tp’) |
497.81 |
0.37 |
| mod2 |
s(tss, bs = ‘tp’) |
534.20 |
0.35 |
| mod3 |
te(nh, tss, bs = c(‘tp’, ‘tp’)) |
481.66 |
0.41 |
| mod4 |
s(nh, bs = ‘tp’) + s(tss, bs = ‘tp’) |
481.38 |
0.41 |
| mod5 |
s(nh, bs = ‘tp’) + te(nh, tss, bs = c(‘tp’, ‘tp’)) |
481.83 |
0.41 |
| mod6 |
s(tss, bs = ‘tp’) + te(nh, tss, bs = c(‘tp’, ‘tp’)) |
478.98 |
0.41 |
| mod7 |
s(nh, bs = ‘tp’) + s(tss, bs = ‘tp’) + te(nh, tss, bs = c(‘tp’, ‘tp’)) |
479.77 |
0.41 |
Summary stats of best three three models:
# best model with season, year, flow
best3 <- map(mods3, ~ summary(.x)$r.sq) %>%
unlist %>%
which.max %>%
mods3[[.]]
best <- list(best1 = best1, best2 = best2, best3 = best3)
# smoother stats of GAMs
map(best, ~ summary(.x)$s.table %>% data.frame %>% rownames_to_column('smoother')) %>%
enframe %>%
unnest %>%
kable(digits = 2)
| best1 |
s(dec_time) |
1.00 |
1.00 |
10.64 |
0.00 |
| best1 |
s(doy) |
4.16 |
8.00 |
5.16 |
0.00 |
| best1 |
te(dec_time,doy) |
2.85 |
15.00 |
0.28 |
0.16 |
| best2 |
s(dec_time) |
1.00 |
1.00 |
1.93 |
0.17 |
| best2 |
s(doy) |
2.94 |
8.00 |
0.71 |
0.05 |
| best2 |
s(flo) |
6.78 |
7.86 |
2.96 |
0.00 |
| best2 |
te(flo,doy) |
2.96 |
12.00 |
0.58 |
0.01 |
| best2 |
te(flo,dec_time) |
0.21 |
15.00 |
0.02 |
0.09 |
| best2 |
te(dec_time,doy) |
0.00 |
15.00 |
0.00 |
0.81 |
| best2 |
te(dec_time,doy,flo) |
19.86 |
48.00 |
1.01 |
0.00 |
| best3 |
s(dec_time) |
8.05 |
8.73 |
2.78 |
0.00 |
| best3 |
s(doy) |
4.40 |
8.00 |
5.36 |
0.00 |
| best3 |
s(flo) |
1.00 |
1.00 |
5.66 |
0.02 |
| best3 |
te(flo,doy) |
0.98 |
15.00 |
0.12 |
0.02 |
| best3 |
te(flo,dec_time) |
3.94 |
16.00 |
1.21 |
0.00 |
| best3 |
te(dec_time,doy) |
0.00 |
15.00 |
0.00 |
0.60 |
| best3 |
te(dec_time,doy,flo) |
4.37 |
48.00 |
0.15 |
0.02 |
| best3 |
s(nh) |
3.62 |
4.51 |
5.90 |
0.00 |
| best3 |
te(nh,tss) |
9.82 |
20.00 |
1.52 |
0.00 |
# summary stats of GAMs
map(best, ~ data.frame(
AIC = AIC(.x),
R2 = summary(.x)$r.sq)) %>%
enframe %>%
unnest %>%
kable(digits = 2)
| best1 |
608.68 |
0.17 |
| best2 |
556.83 |
0.34 |
| best3 |
481.83 |
0.41 |
# predictions
sl_res3 <- map(best, function(x){
sl_mod %>%
mutate(
pred = predict(x)
)
}) %>%
enframe('mods') %>%
unnest
# plot
p <- ggplot(sl_res3, aes(x = date)) +
geom_point(data = sl_mod, aes(y = chl), size = 0.5) +
geom_line(aes(y = pred, colour = mods)) +
theme_bw() +
theme(
legend.position = 'top',
legend.title = element_blank()
)
ggplotly(p)